﻿using System;
using System.Collections.Generic;
using System.Text;
using System.IO;
using System.Runtime.InteropServices;
using System.Runtime.InteropServices.ComTypes;
using Microsoft.Win32;

namespace Xnlab.Filio
{
    #region IFilter
    [ComVisible(false)]
    [ComImport, InterfaceType(ComInterfaceType.InterfaceIsIUnknown), Guid("00000001-0000-0000-C000-000000000046")]
    internal interface IClassFactory
    {
        void CreateInstance([MarshalAs(UnmanagedType.Interface)] object pUnkOuter, ref Guid refiid, [MarshalAs(UnmanagedType.Interface)] out object ppunk);
        void LockServer(bool fLock);
    }

    /// <summary>
    /// Utility class to get a Class Factory for a certain Class ID 
    /// by loading the dll that implements that class
    /// </summary>
    internal static class ComHelper
    {
        //DllGetClassObject fuction pointer signature
        private delegate int DllGetClassObject(ref Guid ClassId, ref Guid InterfaceId, [Out, MarshalAs(UnmanagedType.Interface)] out object ppunk);

        //Some win32 methods to load\unload dlls and get a function pointer
        private class Win32NativeMethods
        {
            [DllImport("kernel32.dll", CharSet = CharSet.Ansi)]
            public static extern IntPtr GetProcAddress(IntPtr hModule, string lpProcName);

            [DllImport("kernel32.dll")]
            public static extern bool FreeLibrary(IntPtr hModule);

            [DllImport("kernel32.dll")]
            public static extern IntPtr LoadLibrary(string lpFileName);
        }

        /// <summary>
        /// Holds a list of dll handles and unloads the dlls 
        /// in the destructor
        /// </summary>
        private class DllList
        {
            private List<IntPtr> _dllList = new List<IntPtr>();
            public void AddDllHandle(IntPtr dllHandle)
            {
                lock (_dllList)
                {
                    _dllList.Add(dllHandle);
                }
            }

            ~DllList()
            {
                foreach (IntPtr dllHandle in _dllList)
                {
                    try
                    {
                        Win32NativeMethods.FreeLibrary(dllHandle);
                    }
                    catch { };
                }
            }
        }

        static DllList _dllList = new DllList();

        /// <summary>
        /// Gets a class factory for a specific COM Class ID. 
        /// </summary>
        /// <param name="dllName">The dll where the COM class is implemented</param>
        /// <param name="filterPersistClass">The requested Class ID</param>
        /// <returns>IClassFactory instance used to create instances of that class</returns>
        internal static IClassFactory GetClassFactory(string dllName, string filterPersistClass)
        {
            //Load the class factory from the dll
            IClassFactory classFactory = GetClassFactoryFromDll(dllName, filterPersistClass);
            return classFactory;
        }

        private static IClassFactory GetClassFactoryFromDll(string dllName, string filterPersistClass)
        {
            //Load the dll
            IntPtr dllHandle = Win32NativeMethods.LoadLibrary(dllName);
            if (dllHandle == IntPtr.Zero)
                return null;

            //Keep a reference to the dll until the process\AppDomain dies
            _dllList.AddDllHandle(dllHandle);

            //Get a pointer to the DllGetClassObject function
            IntPtr dllGetClassObjectPtr = Win32NativeMethods.GetProcAddress(dllHandle, "DllGetClassObject");
            if (dllGetClassObjectPtr == IntPtr.Zero)
                return null;

            //Convert the function pointer to a .net delegate
            DllGetClassObject dllGetClassObject = (DllGetClassObject)Marshal.GetDelegateForFunctionPointer(dllGetClassObjectPtr, typeof(DllGetClassObject));

            //Call the DllGetClassObject to retreive a class factory for out Filter class
            Guid filterPersistGUID = new Guid(filterPersistClass);
            Guid IClassFactoryGUID = new Guid("00000001-0000-0000-C000-000000000046"); //IClassFactory class id
            Object unk;
            if (dllGetClassObject(ref filterPersistGUID, ref IClassFactoryGUID, out unk) != 0)
                return null;

            //Yippie! cast the returned object to IClassFactory
            return (unk as IClassFactory);
        }
    }
    /// <summary>
    /// FilterLoader finds the dll and ClassID of the COM object responsible  
    /// for filtering a specific file extension. 
    /// It then loads that dll, creates the appropriate COM object and returns 
    /// a pointer to an IFilter instance
    /// </summary>
    /// <remarks>
    /// Developing IFilter Add-ins
    /// http://msdn2.microsoft.com/en-us/library/aa965717.aspx
    /// </remarks>
    static class FilterLoader
    {
        #region CacheEntry
        private class CacheEntry
        {
            public string DllName;
            public string ClassName;

            public CacheEntry(string dllName, string className)
            {
                DllName = dllName;
                ClassName = className;
            }
        }
        #endregion

        static Dictionary<string, CacheEntry> _cache = new Dictionary<string, CacheEntry>();

        #region Registry Read String helper
        static string ReadStrFromHKLM(string key)
        {
            return ReadStrFromHKLM(key, null);
        }
        static string ReadStrFromHKLM(string key, string value)
        {
            RegistryKey rk = Registry.LocalMachine.OpenSubKey(key);
            if (rk == null)
                return null;

            using (rk)
            {
                return (string)rk.GetValue(value);
            }
        }
        #endregion

        /// <summary>
        /// finds an IFilter implementation for a file type
        /// </summary>
        /// <param name="ext">The extension of the file</param>
        /// <returns>an IFilter instance used to retreive text from that file type</returns>
        private static IFilter LoadIFilter(string ext)
        {
            string dllName, filterPersistClass;

            //Find the dll and ClassID
            if (GetFilterDllAndClass(ext, out dllName, out filterPersistClass))
            {
                //load the dll and return an IFilter instance.
                return LoadFilterFromDll(dllName, filterPersistClass);
            }
            return null;
        }

        internal static IFilter LoadAndInitIFilter(string fileName)
        {
            return LoadAndInitIFilter(fileName, Path.GetExtension(fileName));
        }

        internal static IFilter LoadAndInitIFilter(string fileName, string extension)
        {
            IFilter filter = LoadIFilter(extension);
            if (filter == null)
                return null;

            IPersistFile persistFile = (filter as IPersistFile);
            if (persistFile != null)
            {
                persistFile.Load(fileName, 0);
                IFILTER_FLAGS flags;
                IFILTER_INIT iflags =
                            IFILTER_INIT.CANON_HYPHENS |
                            IFILTER_INIT.CANON_PARAGRAPHS |
                            IFILTER_INIT.CANON_SPACES |
                            IFILTER_INIT.APPLY_INDEX_ATTRIBUTES |
                            IFILTER_INIT.HARD_LINE_BREAKS |
                            IFILTER_INIT.FILTER_OWNED_VALUE_OK;

                iflags =
                 IFILTER_INIT.CANON_HYPHENS |
                 IFILTER_INIT.CANON_PARAGRAPHS |
                 IFILTER_INIT.CANON_SPACES |
                 IFILTER_INIT.HARD_LINE_BREAKS |
                 IFILTER_INIT.FILTER_OWNED_VALUE_OK |
                 IFILTER_INIT.APPLY_INDEX_ATTRIBUTES |
                 IFILTER_INIT.APPLY_CRAWL_ATTRIBUTES |
                 IFILTER_INIT.APPLY_OTHER_ATTRIBUTES |
                 IFILTER_INIT.FILTER_OWNED_VALUE_OK; // added [CD]

                if (filter.Init(iflags, 0, IntPtr.Zero, out flags) == IFilterReturnCode.S_OK)
                    return filter;
            }
            //If we failed to retreive an IPersistFile interface or to initialize 
            //the filter, we release it and return null.
            Marshal.ReleaseComObject(filter);
            return null;
        }

        private static IFilter LoadFilterFromDll(string dllName, string filterPersistClass)
        {
            //Get a classFactory for our classID
            IClassFactory classFactory = ComHelper.GetClassFactory(dllName, filterPersistClass);
            if (classFactory == null)
                return null;

            //And create an IFilter instance using that class factory
            // http://msdn2.microsoft.com/en-us/library/aa965717.aspx
            // The first identifies all filters' persistent handler, IID_IFilter, which is {89BCB740-6119-101A-BCB7-00DD010655AF}. This CLSID is constant for all filters that implement IFilter.
            Guid IFilterGUID = new Guid("89BCB740-6119-101A-BCB7-00DD010655AF");
            Object obj;
            classFactory.CreateInstance(null, ref IFilterGUID, out obj);
            return (obj as IFilter);
        }

        private static bool GetFilterDllAndClass(string ext, out string dllName, out string filterPersistClass)
        {
            if (!GetFilterDllAndClassFromCache(ext, out dllName, out filterPersistClass))
            {
                string persistentHandlerClass;

                persistentHandlerClass = GetPersistentHandlerClass(ext, true);
                if (persistentHandlerClass != null)
                {
                    GetFilterDllAndClassFromPersistentHandler(persistentHandlerClass,
                      out dllName, out filterPersistClass);
                }
                AddExtensionToCache(ext, dllName, filterPersistClass);
            }
            return (dllName != null && filterPersistClass != null);
        }

        private static void AddExtensionToCache(string ext, string dllName, string filterPersistClass)
        {
            lock (_cache)
            {
                if (!_cache.ContainsKey(ext.ToLower()))
                    _cache.Add(ext.ToLower(), new CacheEntry(dllName, filterPersistClass));
            }
        }

        private static bool GetFilterDllAndClassFromPersistentHandler(string persistentHandlerClass, out string dllName, out string filterPersistClass)
        {
            dllName = null;
            filterPersistClass = null;

            //Read the CLASS ID of the IFilter persistent handler
            filterPersistClass = ReadStrFromHKLM(@"Software\Classes\CLSID\" + persistentHandlerClass +
              @"\PersistentAddinsRegistered\{89BCB740-6119-101A-BCB7-00DD010655AF}");
            if (String.IsNullOrEmpty(filterPersistClass))
                return false;

            //Read the dll name 
            dllName = ReadStrFromHKLM(@"Software\Classes\CLSID\" + filterPersistClass +
              @"\InprocServer32");
            return (!String.IsNullOrEmpty(dllName));
        }

        private static string GetPersistentHandlerClass(string ext, bool searchContentType)
        {
            //Try getting the info from the file extension
            string persistentHandlerClass = GetPersistentHandlerClassFromExtension(ext);
            if (String.IsNullOrEmpty(persistentHandlerClass))
                //try getting the info from the document type 
                persistentHandlerClass = GetPersistentHandlerClassFromDocumentType(ext);
            if (searchContentType && String.IsNullOrEmpty(persistentHandlerClass))
                //Try getting the info from the Content Type
                persistentHandlerClass = GetPersistentHandlerClassFromContentType(ext);
            return persistentHandlerClass;
        }

        private static string GetPersistentHandlerClassFromContentType(string ext)
        {
            string contentType = ReadStrFromHKLM(@"Software\Classes\" + ext, "Content Type");
            if (String.IsNullOrEmpty(contentType))
                return null;

            string contentTypeExtension = ReadStrFromHKLM(@"Software\Classes\MIME\Database\Content Type\" + contentType,
                "Extension");
            if (ext.Equals(contentTypeExtension, StringComparison.CurrentCultureIgnoreCase))
                return null; //No need to look further. This extension does not have any persistent handler

            //We know the extension that is assciated with that content type. Simply try again with the new extension
            return GetPersistentHandlerClass(contentTypeExtension, false); //Don't search content type this time.
        }

        private static string GetPersistentHandlerClassFromDocumentType(string ext)
        {
            //Get the DocumentType of this file extension
            string docType = ReadStrFromHKLM(@"Software\Classes\" + ext);
            if (String.IsNullOrEmpty(docType))
                return null;

            //Get the Class ID for this document type
            string docClass = ReadStrFromHKLM(@"Software\Classes\" + docType + @"\CLSID");
            if (String.IsNullOrEmpty(docType))
                return null;

            //Now get the PersistentHandler for that Class ID
            return ReadStrFromHKLM(@"Software\Classes\CLSID\" + docClass + @"\PersistentHandler");
        }

        private static string GetPersistentHandlerClassFromExtension(string ext)
        {
            return ReadStrFromHKLM(@"Software\Classes\" + ext + @"\PersistentHandler");
        }

        private static bool GetFilterDllAndClassFromCache(string ext, out string dllName, out string filterPersistClass)
        {
            string lowerExt = ext.ToLower();
            lock (_cache)
            {
                CacheEntry cacheEntry;
                if (_cache.TryGetValue(lowerExt, out cacheEntry))
                {
                    dllName = cacheEntry.DllName;
                    filterPersistClass = cacheEntry.ClassName;
                    return true;
                }
            }
            dllName = null;
            filterPersistClass = null;
            return false;
        }
    }
    /// <summary>
    /// Implements a TextReader that reads from an IFilter. 
    /// </summary>
    internal class FilterReader : TextReader
    {
        IFilter _filter;
        private bool _done;
        private STAT_CHUNK _currentChunk;
        private bool _currentChunkValid;
        private char[] _charsLeftFromLastRead;

        public override void Close()
        {
            Dispose(true);
            GC.SuppressFinalize(this);
        }

        ~FilterReader()
        {
            Dispose(false);
        }

        protected override void Dispose(bool disposing)
        {
            if (_filter != null)
                Marshal.ReleaseComObject(_filter);
        }

        public override int Read(char[] array, int offset, int count)
        {
            int endOfChunksCount = 0;
            int charsRead = 0;

            while (!_done && charsRead < count)
            {
                if (_charsLeftFromLastRead != null)
                {
                    int charsToCopy = (_charsLeftFromLastRead.Length < count - charsRead) ? _charsLeftFromLastRead.Length : count - charsRead;
                    Array.Copy(_charsLeftFromLastRead, 0, array, offset + charsRead, charsToCopy);
                    charsRead += charsToCopy;
                    if (charsToCopy < _charsLeftFromLastRead.Length)
                    {
                        char[] tmp = new char[_charsLeftFromLastRead.Length - charsToCopy];
                        Array.Copy(_charsLeftFromLastRead, charsToCopy, tmp, 0, tmp.Length);
                        _charsLeftFromLastRead = tmp;
                    }
                    else
                        _charsLeftFromLastRead = null;
                    continue;
                };

                if (!_currentChunkValid)
                {
                    IFilterReturnCode res = _filter.GetChunk(out _currentChunk);
                    _currentChunkValid = (res == IFilterReturnCode.S_OK) && ((_currentChunk.flags & CHUNKSTATE.CHUNK_TEXT) != 0);

                    if (res == IFilterReturnCode.FILTER_E_END_OF_CHUNKS)
                        endOfChunksCount++;

                    if (endOfChunksCount > 1)
                        _done = true; //That's it. no more chuncks available
                }

                if (_currentChunkValid)
                {
                    uint bufLength = (uint)(count - charsRead);
                    if (bufLength < 8192)
                        bufLength = 8192; //Read ahead

                    char[] buffer = new char[bufLength];
                    IFilterReturnCode res = _filter.GetText(ref bufLength, buffer);
                    if (res == IFilterReturnCode.S_OK || res == IFilterReturnCode.FILTER_S_LAST_TEXT)
                    {
                        int cRead = (int)bufLength;
                        if (cRead + charsRead > count)
                        {
                            int charsLeft = (cRead + charsRead - count);
                            _charsLeftFromLastRead = new char[charsLeft];
                            Array.Copy(buffer, cRead - charsLeft, _charsLeftFromLastRead, 0, charsLeft);
                            cRead -= charsLeft;
                        }
                        else
                            _charsLeftFromLastRead = null;

                        Array.Copy(buffer, 0, array, offset + charsRead, cRead);
                        charsRead += cRead;
                    }

                    if (res == IFilterReturnCode.FILTER_S_LAST_TEXT || res == IFilterReturnCode.FILTER_E_NO_MORE_TEXT)
                        _currentChunkValid = false;
                }
            }
            return charsRead;
        }

        public FilterReader(string fileName)
        {
            _filter = FilterLoader.LoadAndInitIFilter(fileName);
            if (_filter == null)
                throw new ArgumentException("no filter defined for " + fileName);
        }
    }
    [StructLayout(LayoutKind.Sequential)]
    public struct FULLPROPSPEC
    {
        public Guid guidPropSet;
        public PROPSPEC psProperty;
    }

    [StructLayout(LayoutKind.Sequential)]
    internal struct FILTERREGION
    {
        public int idChunk;
        public int cwcStart;
        public int cwcExtent;
    }

    [StructLayout(LayoutKind.Explicit)]
    public struct PROPSPEC
    {
        [FieldOffset(0)]
        public int ulKind;     // 0 - string used; 1 - PROPID
        [FieldOffset(4)]
        public int propid;
        [FieldOffset(4)]
        public IntPtr lpwstr;
    }

    [Flags]
    internal enum IFILTER_FLAGS
    {
        /// <summary>
        /// The caller should use the IPropertySetStorage and IPropertyStorage
        /// interfaces to locate additional properties. 
        /// When this flag is set, properties available through COM
        /// enumerators should not be returned from IFilter. 
        /// </summary>
        IFILTER_FLAGS_OLE_PROPERTIES = 1
    }

    /// <summary>
    /// Flags controlling the operation of the FileFilter
    /// instance.
    /// </summary>
    [Flags]
    internal enum IFILTER_INIT
    {
        NONE = 0,
        /// <summary>
        /// Paragraph breaks should be marked with the Unicode PARAGRAPH
        /// SEPARATOR (0x2029)
        /// </summary>
        CANON_PARAGRAPHS = 1,

        /// <summary>
        /// Soft returns, such as the newline character in Microsoft Word, should
        /// be replaced by hard returnsLINE SEPARATOR (0x2028). Existing hard
        /// returns can be doubled. A carriage return (0x000D), line feed (0x000A),
        /// or the carriage return and line feed in combination should be considered
        /// a hard return. The intent is to enable pattern-expression matches that
        /// match against observed line breaks. 
        /// </summary>
        HARD_LINE_BREAKS = 2,

        /// <summary>
        /// Various word-processing programs have forms of hyphens that are not
        /// represented in the host character set, such as optional hyphens
        /// (appearing only at the end of a line) and nonbreaking hyphens. This flag
        /// indicates that optional hyphens are to be converted to nulls, and
        /// non-breaking hyphens are to be converted to normal hyphens (0x2010), or
        /// HYPHEN-MINUSES (0x002D). 
        /// </summary>
        CANON_HYPHENS = 4,

        /// <summary>
        /// Just as the CANON_HYPHENS flag standardizes hyphens,
        /// this one standardizes spaces. All special space characters, such as
        /// nonbreaking spaces, are converted to the standard space character
        /// (0x0020). 
        /// </summary>
        CANON_SPACES = 8,

        /// <summary>
        /// Indicates that the client wants text split into chunks representing
        /// public value-type properties. 
        /// </summary>
        APPLY_INDEX_ATTRIBUTES = 16,

        /// <summary>
        /// Indicates that the client wants text split into chunks representing
        /// properties determined during the indexing process. 
        /// </summary>
        APPLY_CRAWL_ATTRIBUTES = 256,

        /// <summary>
        /// Any properties not covered by the APPLY_INDEX_ATTRIBUTES
        /// and APPLY_CRAWL_ATTRIBUTES flags should be emitted. 
        /// </summary>
        APPLY_OTHER_ATTRIBUTES = 32,

        /// <summary>
        /// Optimizes IFilter for indexing because the client calls the
        /// IFilter::Init method only once and does not call IFilter::BindRegion.
        /// This eliminates the possibility of accessing a chunk both before and
        /// after accessing another chunk. 
        /// </summary>
        INDEXING_ONLY = 64,

        /// <summary>
        /// The text extraction process must recursively search all linked
        /// objects within the document. If a link is unavailable, the
        /// IFilter::GetChunk call that would have obtained the first chunk of the
        /// link should return FILTER_E_LINK_UNAVAILABLE. 
        /// </summary>
        SEARCH_LINKS = 128,

        /// <summary>
        /// The content indexing process can return property values set by the  filter. 
        /// </summary>
        FILTER_OWNED_VALUE_OK = 512
    }

    public struct STAT_CHUNK
    {
        /// <summary>
        /// The chunk identifier. Chunk identifiers must be unique for the
        /// current instance of the IFilter interface. 
        /// Chunk identifiers must be in ascending order. The order in which
        /// chunks are numbered should correspond to the order in which they appear
        /// in the source document. Some search engines can take advantage of the
        /// proximity of chunks of various properties. If so, the order in which
        /// chunks with different properties are emitted will be important to the
        /// search engine. 
        /// </summary>
        public int idChunk;

        /// <summary>
        /// The type of break that separates the previous chunk from the current
        ///  chunk. Values are from the CHUNK_BREAKTYPE enumeration. 
        /// </summary>
        [MarshalAs(UnmanagedType.U4)]
        public CHUNK_BREAKTYPE breakType;

        /// <summary>
        /// Flags indicate whether this chunk contains a text-type or a
        /// value-type property. 
        /// Flag values are taken from the CHUNKSTATE enumeration. If the CHUNK_TEXT flag is set, 
        /// IFilter::GetText should be used to retrieve the contents of the chunk
        /// as a series of words. 
        /// If the CHUNK_VALUE flag is set, IFilter::GetValue should be used to retrieve 
        /// the value and treat it as a single property value. If the filter dictates that the same 
        /// content be treated as both text and as a value, the chunk should be emitted twice in two       
        /// different chunks, each with one flag set. 
        /// </summary>
        [MarshalAs(UnmanagedType.U4)]
        public CHUNKSTATE flags;

        /// <summary>
        /// The language and sublanguage associated with a chunk of text. Chunk locale is used 
        /// by document indexers to perform proper word breaking of text. If the chunk is 
        /// neither text-type nor a value-type with data type VT_LPWSTR, VT_LPSTR or VT_BSTR, 
        /// this field is ignored. 
        /// </summary>
        public int locale;

        /// <summary>
        /// The property to be applied to the chunk. If a filter requires that       the same text 
        /// have more than one property, it needs to emit the text once for each       property 
        /// in separate chunks. 
        /// </summary>
        public FULLPROPSPEC attribute;

        /// <summary>
        /// The ID of the source of a chunk. The value of the idChunkSource     
        /// member depends on the nature of the chunk: 
        /// If the chunk is a text-type property, the value of the idChunkSource       
        /// member must be the same as the value of the idChunk member. 
        /// If the chunk is an public value-type property derived from textual       
        /// content, the value of the idChunkSource member is the chunk ID for the
        /// text-type chunk from which it is derived. 
        /// If the filter attributes specify to return only public value-type
        /// properties, there is no content chunk from which to derive the current
        /// public value-type property. In this case, the value of the
        /// idChunkSource member must be set to zero, which is an invalid chunk. 
        /// </summary>
        public int idChunkSource;

        /// <summary>
        /// The offset from which the source text for a derived chunk starts in
        /// the source chunk. 
        /// </summary>
        public int cwcStartSource;

        /// <summary>
        /// The length in characters of the source text from which the current
        /// chunk was derived. 
        /// A zero value signifies character-by-character correspondence between
        /// the source text and 
        /// the derived text. A nonzero value means that no such direct
        /// correspondence exists
        /// </summary>
        public int cwcLenSource;
    }

    /// <summary>
    /// Enumerates the different breaking types that occur between 
    /// chunks of text read out by the FileFilter.
    /// </summary>
    public enum CHUNK_BREAKTYPE
    {
        /// <summary>
        /// No break is placed between the current chunk and the previous chunk.
        /// The chunks are glued together. 
        /// </summary>
        CHUNK_NO_BREAK = 0,
        /// <summary>
        /// A word break is placed between this chunk and the previous chunk that
        /// had the same attribute. 
        /// Use of CHUNK_EOW should be minimized because the choice of word
        /// breaks is language-dependent, 
        /// so determining word breaks is best left to the search engine. 
        /// </summary>
        CHUNK_EOW = 1,
        /// <summary>
        /// A sentence break is placed between this chunk and the previous chunk
        /// that had the same attribute. 
        /// </summary>
        CHUNK_EOS = 2,
        /// <summary>
        /// A paragraph break is placed between this chunk and the previous chunk
        /// that had the same attribute.
        /// </summary>     
        CHUNK_EOP = 3,
        /// <summary>
        /// A chapter break is placed between this chunk and the previous chunk
        /// that had the same attribute. 
        /// </summary>
        CHUNK_EOC = 4
    }


    public enum CHUNKSTATE
    {
        /// <summary>
        /// The current chunk is a text-type property.
        /// </summary>
        CHUNK_TEXT = 0x1,
        /// <summary>
        /// The current chunk is a value-type property. 
        /// </summary>
        CHUNK_VALUE = 0x2,
        /// <summary>
        /// Reserved
        /// </summary>
        CHUNK_FILTER_OWNED_VALUE = 0x4
    }

    internal enum IFilterReturnCode : uint
    {
        /// <summary>
        /// Success
        /// </summary>
        S_OK = 0,
        /// <summary>
        /// The function was denied access to the filter file. 
        /// </summary>
        E_ACCESSDENIED = 0x80070005,
        /// <summary>
        /// The function encountered an invalid handle,
        /// probably due to a low-memory situation. 
        /// </summary>
        E_HANDLE = 0x80070006,
        /// <summary>
        /// The function received an invalid parameter.
        /// </summary>
        E_INVALIDARG = 0x80070057,
        /// <summary>
        /// Out of memory
        /// </summary>
        E_OUTOFMEMORY = 0x8007000E,
        /// <summary>
        /// Not implemented
        /// </summary>
        E_NOTIMPL = 0x80004001,
        /// <summary>
        /// Unknown error
        /// </summary>
        E_FAIL = 0x80000008,
        /// <summary>
        /// File not filtered due to password protection
        /// </summary>
        FILTER_E_PASSWORD = 0x8004170B,
        /// <summary>
        /// The document format is not recognised by the filter
        /// </summary>
        FILTER_E_UNKNOWNFORMAT = 0x8004170C,
        /// <summary>
        /// No text in current chunk
        /// </summary>
        FILTER_E_NO_TEXT = 0x80041705,
        /// <summary>
        /// No more chunks of text available in object
        /// </summary>
        FILTER_E_END_OF_CHUNKS = 0x80041700,
        /// <summary>
        /// No more text available in chunk
        /// </summary>
        FILTER_E_NO_MORE_TEXT = 0x80041701,
        /// <summary>
        /// No more property values available in chunk
        /// </summary>
        FILTER_E_NO_MORE_VALUES = 0x80041702,
        /// <summary>
        /// Unable to access object
        /// </summary>
        FILTER_E_ACCESS = 0x80041703,
        /// <summary>
        /// Moniker doesn't cover entire region
        /// </summary>
        FILTER_W_MONIKER_CLIPPED = 0x00041704,
        /// <summary>
        /// Unable to bind IFilter for embedded object
        /// </summary>
        FILTER_E_EMBEDDING_UNAVAILABLE = 0x80041707,
        /// <summary>
        /// Unable to bind IFilter for linked object
        /// </summary>
        FILTER_E_LINK_UNAVAILABLE = 0x80041708,
        /// <summary>
        ///  This is the last text in the current chunk
        /// </summary>
        FILTER_S_LAST_TEXT = 0x00041709,
        /// <summary>
        /// This is the last value in the current chunk
        /// </summary>
        FILTER_S_LAST_VALUES = 0x0004170A
    }

    [ComImport, Guid("89BCB740-6119-101A-BCB7-00DD010655AF")]
    [InterfaceType(ComInterfaceType.InterfaceIsIUnknown)]
    internal interface IFilter
    {
        /// <summary>
        /// The IFilter::Init method initializes a filtering session.
        /// </summary>
        [PreserveSig]
        IFilterReturnCode Init(
            //[in] Flag settings from the IFILTER_INIT enumeration for
            // controlling text standardization, property output, embedding
            // scope, and IFilter access patterns. 
          IFILTER_INIT grfFlags,

          // [in] The size of the attributes array. When nonzero, cAttributes
            //  takes 
            // precedence over attributes specified in grfFlags. If no
            // attribute flags 
            // are specified and cAttributes is zero, the default is given by
            // the 
            // PSGUID_STORAGE storage property set, which contains the date and
            //  time 
            // of the last write to the file, size, and so on; and by the
            //  PID_STG_CONTENTS 
            // 'contents' property, which maps to the main contents of the
            // file. 
            // For more information about properties and property sets, see
            // Property Sets. 
          int cAttributes,

          //[in] Array of pointers to FULLPROPSPEC structures for the
            // requested properties. 
            // When cAttributes is nonzero, only the properties in aAttributes
            // are returned. 
          IntPtr aAttributes,

          // [out] Information about additional properties available to the
            //  caller; from the IFILTER_FLAGS enumeration. 
          out IFILTER_FLAGS pdwFlags);

        /// <summary>
        /// The IFilter::GetChunk method positions the filter at the beginning
        /// of the next chunk, 
        /// or at the first chunk if this is the first call to the GetChunk
        /// method, and returns a description of the current chunk. 
        /// </summary>
        [PreserveSig]
        IFilterReturnCode GetChunk(out STAT_CHUNK pStat);

        /// <summary>
        /// The IFilter::GetText method retrieves text (text-type properties)
        /// from the current chunk, 
        /// which must have a CHUNKSTATE enumeration value of CHUNK_TEXT.
        /// </summary>
        [PreserveSig]
        IFilterReturnCode GetText(
            // [in/out] On entry, the size of awcBuffer array in wide/Unicode
            // characters. On exit, the number of Unicode characters written to
            // awcBuffer. 
            // Note that this value is not the number of bytes in the buffer. 
          ref uint pcwcBuffer,

          // Text retrieved from the current chunk. Do not terminate the
            // buffer with a character.  
          [Out(), MarshalAs(UnmanagedType.LPArray)] 
      char[] awcBuffer);

        /// <summary>
        /// The IFilter::GetValue method retrieves a value (public
        /// value-type property) from a chunk, 
        /// which must have a CHUNKSTATE enumeration value of CHUNK_VALUE.
        /// </summary>
        /// <remarks>
        /// IFilter::GetValue
        /// http://msdn2.microsoft.com/en-us/library/ms690927.aspx
        /// Filtering File Properties
        /// http://msdn2.microsoft.com/en-us/library/ms692552.aspx
        /// </remarks>
        [PreserveSig]
        int GetValue(
            // Allocate the PROPVARIANT structure with CoTaskMemAlloc. Some
            // PROPVARIANT 
            // structures contain pointers, which can be freed by calling the
            // PropVariantClear function. 
            // It is up to the caller of the GetValue method to call the
            //   PropVariantClear method.            
            // ref IntPtr ppPropValue
            // [MarshalAs(UnmanagedType.Struct)]
          ref IntPtr PropVal);

        /// <summary>
        /// The IFilter::BindRegion method retrieves an interface representing
        /// the specified portion of the object. 
        /// Currently reserved for future use.
        /// </summary>
        [PreserveSig]
        int BindRegion(ref FILTERREGION origPos,
          ref Guid riid, ref object ppunk);
    }
    #endregion
}
